## Setup locations
base = "01-cytograms"
here::i_am("01-cytograms.Rmd")
knitr::opts_chunk$set(fig.path = here::here("data", base, 'figures/'))
datadir = outputdir = here::here("data", base)
if(!dir.exists(outputdir)) dir.create(outputdir)
Here is a script to do the following; summarizing the workflow:
Filter particles from the 3-minute level cytograms (i.e. remove min & max in all three dimensions).
Transform fsc_small to diam.
Combine 3-minute level cytograms into hourly cytograms.
Bin this data to 40 equally sized bins in each axis (diam, pe, chl). Each bin value would be the sum (not the count) of the Qc (quantity of carbon) of the particles in that bin.
Here are all pilot cruises:
Here, we outline the actual steps to be taken on a server with a SLURM workload manager. Follow these steps (manual steps for now, but to be streamlined!):
Upload the .vct files (original, particle-level data files) to the server.
# for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608
# for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609
# for cruise_id in MGL1704 KM1708 KM1709 KM1712 KM1713 KM1717
# for cruise_id in KM1802 KM1805 FK180310-1 FK180310-2 KOK1801 KOK1803
# These are the three cruises that still needed to be uploaded.
for cruise_id in FK180310-1 FK180310-2 MGL1704
do
## Figure out how to convert between $cruisename and $cruise_id
# cruise_id=FK180310-1
# cruise_id=FK180310-2
# cruise_id=KOK1803
from=$cruise_id/orig
to=discovery:scratchdir/output/flowmixapp/data/01-cytograms/$cruise_id/.
rsync -av $from $to
done
for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609
for cruise_id in MGL1704 KM1708 KM1709 KM1712 KM1713 KM1717
for cruise_id in KM1802 KM1805 FK180310-1 FK180310-2 KOK1801 KOK1803
for cruise_id in KM1513;
do
mkdir $cruise_id
mkdir $cruise_id/orig
done
Use a slurm script run-bin.slurm directly, or, use run_bin KM1508 which uses the helper function:
run_bin (){
cruise_id=$1
sbatch --export=cruise_id=$cruise_id run-bin.slurm
return 0
}
The SLURM script ~/scripts/flowmixapp/run-bin.slurm is here:
#!/bin/bash
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=10gb
#SBATCH --cpus-per-task=6
#SBATCH --time=1-00:00:00
#SBATCH --export=none # Ensures job gets a fresh login environment
#SBATCH --mail-user=robohyun66@gmail.com
#SBATCH --mail-type=ALL
#SBATCH --job-name=bin
#SBATCH --output=./logs/%A_%a.log
#SBATCH --error=./logs/%A_%a.err
# Set the session up to use R
module load gcc/8.3.0
module load openblas/0.3.8
module load r/3.6.3
Rscript bin.R \
mc.cores=6\
cruise_id=$cruise_id\
exit
which calls the script ~/scripts/flowmixapp/bin.R (which uses helpers from ~/scripts/flowmixapp/01-helpers.R), shown here:
(Note, this cytogram data is rescaled so that the diameter is between 0 and 8.)
## TODO: copy this script into here.
This saves the cytograms (along with the covariates X, and other things like lat, lon, and time) to an RDS file (e.g. KM1508-datobj.RDS), on the server.
Download these files from the server to ~/repos/flowmixapp/data/01-cytograms/KM1508/data/., using this script:
all_cruise_id="KM1508 KM1512 KM1513 KOK1515 KM1518 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609 MGL1704 KM1708 KM1709 KM1713 KM1717 KOK1801 KM1712 KM1713 KM1802 KM1805 KOK1803 FK180310-1 FK180310-2"
for cruise_id in all_cruise_id
do
echo $cruise_id
from=sangwonh@discovery.usc.edu:scratchdir/output/flowmixapp/data/01-cytograms/$cruise_id/data/$cruise_id-datobj.RDS
to=~/repos/flowmixapp/data/01-cytograms/$cruise_id/data
mkdir -p $to
rsync -auv $from $to
done
for cruise_id in all_cruise_id
do
from=~/repos/flowmixapp/data/01-cytograms/$cruise_id/viz/*
to=~/repos/flowmixapp/data/01-cytograms/$cruise_id/viz-no-wind/.
mkdir -p $to
cp $from $to
done
(Assume the processed and binned data files, e.g. KM1508-datobj.Rdata, have been downloaded.)
We’ll load and plot this cytogram data, in several ways. First, produce 1d plots:
all_cruise_id = c(c("KM1508", "KM1512", "KM1513", "KOK1515", "KM1518"),
c("KM1601", "KM1602", "KOK1604", "KOK1606"),
c("KOK1607", "KOK1608", "KOK1609"),
c("MGL1704", "KM1708","KM1709", "KM1713", "KM1717", "KOK1801"),
c("KM1712", "KM1713", "KM1802", "KM1805", "KOK1803", "FK180310-1", "FK180310-2"))
qctablist = list()
for(cruise_id in all_cruise_id){
cat("### ", cruise_id, "\n")
## Load the binned 3d data
filename = paste0(cruise_id, "-datobj.RDS")
ybin_obj = readRDS(file = file.path(datadir, cruise_id, "data", filename))
ylist = ybin_obj$ylist
qclist = ybin_obj$biomass_list
## Make plots of total QC at each time point
qc_over_time = qclist %>% lapply(sum) %>% unlist()
qctab = tibble(time = names(ylist) %>% lubridate::as_datetime(),
qcsum = as.numeric(qc_over_time))
qctablist[[cruise_id]] = qctab
p = qctab %>%
ggplot() +
geom_line(aes(x = time, y = qcsum), col = rgb(0,0,0,0.2)) +
geom_point(aes(x = time, y = qcsum), cex = .5) +
ylab("Sum of QC over time") +
ggtitle(cruise_id) +
scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M"))
p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
p = p + theme(legend.position="none")
plot(p)
## Make lat/lon plots
X = ybin_obj$X
p = X %>% select(time, lat, lon) %>%
pivot_longer(-one_of("time")) %>%
ggplot() +
facet_wrap(~name, scale="free_y", ncol=1) +
geom_line(aes(x=time, y=value)) +
scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M"))
p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
p = p + theme(legend.position="none")
plot(p)
## Make 1d cytogram plot
y_qc_list = Map(function(y, qc){ as_tibble(y) %>% add_column(qc = qc) }, ylist, qclist)
for(dimname in c("diam", "chl", "pe")){
## Summarize to 1d binned data
y_qc_list_1d = y_qc_list %>% purrr::map(. %>% group_by(!!as.name(dimname)) %>% summarise(qc=sum(qc)))
qclist_1d = y_qc_list_1d %>% purrr::map(.%>% dplyr::pull(qc))
ylist_1d = y_qc_list_1d %>% purrr::map(.%>% dplyr::pull(!!as.name(dimname)))
## Scaling or not?
for(scale_at_each_time in c(FALSE, TRUE)){
if(scale_at_each_time) qclist_1d = qclist_1d %>% lapply(function(qc) qc/sum(qc))
## Make plots
mytitle = paste0(cruise_id, " - ", dimname)
if(scale_at_each_time) mytitle = paste0(mytitle, ", density at each time")
p = flowmix::bin_plot_1d(ylist_1d, qclist_1d)
p = p + ggtitle(mytitle)
p = p + theme_minimal()
p = p + scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M"))
p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
p = p + theme(legend.position="none")
plot(p)
## Save to file as well.
filename = paste0(cruise_id, "-1d-", dimname, ".png")
if(scale_at_each_time) filename = paste0(cruise_id, "-1d-", dimname, "-density.png")
if(!file.exists(filename)){
ggsave(file = file.path(datadir, cruise_id, "viz", filename),
width = 10, height = 3, units = "in", dpi = 300, p)
}
}
}
cat("\n\n")
}
saveRDS(qctablist, file = file.path(outputdir, "qctablist.RDS"))
Next, 2d plots made into videos:
(The png figures and mp4 video will be saved to e.g. data/01-cytogram/MGL1704/viz. These are too large to place in github; instead, they have been placed here: Dropbox link )
## Load data
for(cruise_id in all_cruise_id){
cat('###', cruise_id,' \n')
filename = paste0(cruise_id, "-datobj.RDS")
ybin_obj = readRDS(file = file.path(datadir, cruise_id, "data", filename))
ylist = ybin_obj$ylist
qclist = ybin_obj$biomass_list
dir.create(file.path(datadir, cruise_id))
## Create the series of 2d plots
TT = length(ylist)
coul <- colorRampPalette(RColorBrewer::brewer.pal(8, "RdYlBu"))(25) %>% rev()
for(tt in 1:TT){
## See if file has already been made
filename = paste0(tt, ".png")
plotfile = file.path(datadir, cruise_id, "viz", filename)
p = plot_2d_threepanels(ylist = ylist, countslist = qclist,
tt = tt, colours = coul, cruise_id = cruise_id)
if(!file.exists(plotfile)){
ggsave(file = plotfile, width = 12, height = 4, units = "in", dpi = 300, p)
}
if(tt == 1){
oneplotfile = file.path(datadir, cruise_id, "viz", paste0(tt, ".jpeg"))
ggsave(file = oneplotfile, width = 12, height = 4, units = "in", dpi = 300, p)
}
}
## Make into a video
videofile = file.path(outputdir, cruise_id, "viz", paste0(cruise_id, "-data.mp4"))
if(!file.exists(videofile)){
setwd(file.path(datadir, cruise_id, "viz"))
system(paste0("ffmpeg -y -framerate 3 -i %d.png ", cruise_id, "-data.mp4"))
}
## Embed video (Not doing this anymore, because the video file size is too big.)
if(FALSE){
cat(paste0('<video width="1280" height="720" controls> <source src="', videofile, '" type="video/mp4"> </video>'))
}
## Instead, we'll just embed one frame
cat(paste0(",
")"))
cat('\n\n')
}
knitr::knit_exit()